library(GSVA)
library(openxlsx)
library(tidyverse)
library(Biobase)
library(ComplexHeatmap)
library(circlize)
library(hypeR)
PATH <- file.path(Sys.getenv("MLAB"), "projects/brcameta/exosome/4t1_brca_brain_mets/")
TCGAPATH <- file.path(Sys.getenv("CBM"),"TCGA-GDC/")
METABRICPATH <- file.path(Sys.getenv("CBM"), "METABRIC/")
DPATH <- file.path(Sys.getenv("CBM"),"otherStudies/RNAseq/2022-06-03-DenisExosomeBrCaBrainMets/")
do_save <- TRUE
Loading Data
tcga_brca <- readRDS(file.path(TCGAPATH,"/esets_filtered/TCGA-BRCA_2020-03-22_DESeq2_log_filtered_eset.rds"))
metabric <- readRDS(file.path(METABRICPATH, "ESets/metabric_GE_ESet.rds"))
metabric_impute <- readRDS(file.path(METABRICPATH, "ESets/metabric_GE_ESet_impute.rds"))
#Signatures
## Obesity signature from Fuentes-Mattei et al., JNCI 2014, Table S3.
obDiff <- read.xlsx( file.path(PATH,"data/JNCI2014_SupplementaryTable3_ObeseSignatureBrCa.xlsx") )
colnames(obDiff)[match(c("Gene.Title.(Definition)","Log.ratio.(Log10)","p-value*"),colnames(obDiff))] <-
c("Gene.Title","Log10.ratio","pvalue")
obSig <- list(
ob.up=dplyr::filter(obDiff,Log10.ratio>0) %>% select(Gene.Symbol) %>% drop_na() %>% distinct() %>% pull(),
ob.dn=dplyr::filter(obDiff,Log10.ratio<0) %>% select(Gene.Symbol) %>% drop_na() %>% distinct() %>% pull())
## Signatures from RNA-Seq experiments
all_sigs <- readRDS(file.path(PATH,"data/signatures_symbol.rds"))
all_sigs <- all_sigs[lapply(all_sigs, length) > 1]
all_sigs_human <- lapply(all_sigs, babelgene::orthologs, species="mouse", human=FALSE)
all_sigs_human <- lapply(all_sigs_human, function (x) x %>% dplyr::pull(human_symbol))
all_sigs_human <- c(all_sigs_human, obSig)
GSVA
# GSVA
if (do_save) {
#subtype_filter <- is.na(tcga_brca$subtype_selected)
#tcga_brca <- tcga_brca[,!subtype_filter]
#tcga_brca <- tcga_brca[,tcga_brca$subtype_selected == "BRCA.LumA"]
#metabric <- metabric[,metabric$Pam50_SUBTYPE == "LumA"]
tcga_gsva <- gsva(tcga_brca, all_sigs_human, mx.diff=TRUE, verbose=FALSE)
metabric_gsva <- gsva(metabric, all_sigs_human, mx.diff=TRUE, verbose=FALSE)
saveRDS(metabric_gsva, file.path(PATH, "data/metabric_gsva_res_obs.rds"))
saveRDS(tcga_gsva, file.path(PATH, "data/tcga_gsva_res_obs.rds"))
} else {
tcga_gsva <- readRDS(file.path(PATH, "data/tcga_gsva_res_obs.rds"))
metabric_gsva <- readRDS(file.path(PATH, "data/metabric_gsva_res_obs.rds"))
}
# Subtype filtering
metabric_gsva <- metabric_gsva[,metabric_gsva$Pam50_SUBTYPE != "NC"]
subtype_filter <- is.na(tcga_gsva$subtype_selected)
tcga_gsva <- tcga_gsva[,!subtype_filter]
pData(tcga_gsva) <- pData(tcga_gsva) %>% mutate(pam50subtype = str_split(subtype_selected, pattern="BRCA.", simplify=TRUE)[,2])
tcga_gsva$ir_c <- t(exprs(tcga_gsva["ir_c_up",]) - exprs(tcga_gsva["ir_c_down"]))
tcga_gsva$is_c <- t(exprs(tcga_gsva["is_c_up",])-exprs(tcga_gsva["is_c_down",]))
tcga_gsva$ir_is <- t(exprs(tcga_gsva["ir_is_up",])-exprs(tcga_gsva["ir_is_down",]))
tcga_gsva$ob <- t(exprs(tcga_gsva["ob.up",])-exprs(tcga_gsva["ob.dn",]))
metabric_gsva$ir_c <- t(exprs(metabric_gsva["ir_c_up",]) - exprs(metabric_gsva["ir_c_down"]))
metabric_gsva$is_c <- t(exprs(metabric_gsva["is_c_up",])-exprs(metabric_gsva["is_c_down",]))
metabric_gsva$ir_is <- t(exprs(metabric_gsva["ir_is_up",])-exprs(metabric_gsva["ir_is_down",]))
metabric_gsva$ob <- t(exprs(metabric_gsva["ob.up",])-exprs(metabric_gsva["ob.dn",]))
Boxplots
col.pam50 <- c(LumA="pink",LumB="red",Her2="yellow",Basal="darkgray",Normal="white")
for (gset in c("ir_c", "is_c", "ir_is", "ob")) {
par(mfrow=c(1,2))
boxplot(pData(tcga_gsva)[,gset] ~ tcga_gsva$pam50subtype, col=col.pam50, las=2,
xlab="pam50",ylab=gset,main="TCGA" )
boxplot(pData(metabric_gsva)[,gset] ~ metabric_gsva$Pam50_SUBTYPE, col=col.pam50, las=2,
xlab="pam50",ylab=gset,main="METABRIC" )
}




Correlation with Obesity
TCGA Heatmap
sample_col <- hcl.colors(n=nlevels(tcga_gsva$pam50subtype %>% factor))
names(sample_col) <- levels(tcga_gsva$pam50subtype %>% factor)
ha.genes <- HeatmapAnnotation(sample_type=tcga_gsva$pam50subtype,
col=list(sample_type=sample_col))
png(file=file.path(PATH, "results/GSVA_tcga_obs_Heatmap.png"))
gsva_heatmap <- Heatmap(t(scale(t(exprs(tcga_gsva)))),
name="Enrichment Score",
col=colorRamp2(c(-3, 0, 3), c("#072448", "white", "#ff6150")),
top_annotation=ha.genes,
cluster_rows=TRUE,
cluster_columns=TRUE,
clustering_distance_rows="euclidean",
clustering_method_rows="ward.D",
clustering_distance_columns="euclidean",
clustering_method_columns="ward.D",
show_parent_dend_line=TRUE,
column_title = "samples",
column_dend_height = unit(5, "mm"),
row_title="Genesets",
row_dend_width = unit(5, "mm"),
show_column_names=FALSE,
show_row_names=TRUE)
draw(gsva_heatmap)
dev.off()
## png
## 2
gsva_heatmap

TCGA Corplot
library(ggcorrplot)
gs_cor <- cor(t(exprs(tcga_gsva)))
ggcorrplot(gs_cor, hc.order = TRUE, type = "lower",
outline.col = "white",
ggtheme = ggplot2::theme_gray,
lab=TRUE,
colors = c("#6D9EC1", "white", "#E46726"))

KS-based enrichment
## Add gene symbols to DESeq tables
dat <- readRDS(file.path(PATH, "data/4T1_mets_sExp.rds"))
deseq_list <- readRDS(file.path(PATH,"data/deseq_list.rds"))
hallmark_genesets <- msigdb_download(species="Mus musculus", category="H")
hallmark_genesets$OBESITY <- obSig
deseq_list <- lapply(deseq_list, function(Z) {
as.data.frame(Z) %>%
tibble::rownames_to_column(var="ensembl_id") %>%
dplyr::mutate(geneID=unlist(rowData(dat)[ensembl_id,"mgi_symbol"]))
})
## rank by up-regulation
rank_signatures_up <- lapply(
deseq_list, function(sig) sig %>%
dplyr::filter(geneID!="") %>%
dplyr::arrange(desc(log2FoldChange)) %>%
dplyr::select(geneID,log2FoldChange) %>%
tibble::deframe())
names(rank_signatures_up) <- paste(names(rank_signatures_up),"up",sep="_")
## rank by down-regulation
rank_signatures_dn <- lapply(
deseq_list,function(sig) sig %>%
dplyr::filter(geneID!="") %>%
dplyr::arrange(log2FoldChange) %>%
dplyr::select(geneID,log2FoldChange) %>%
tibble::deframe())
names(rank_signatures_dn) <- paste(names(rank_signatures_dn),"dn",sep="_")
rank_signatures <- c(rank_signatures_up,rank_signatures_dn)
rank_signatures <- rank_signatures[order(names(rank_signatures),decreasing = TRUE)]
Hallmarks + Pamm
#all_genesets <- c(hallmark_genesets, pamm_genelists)
max_fdr <- 0.05
ks_hall <- hypeR::hypeR(signature=rank_signatures,genesets=hallmark_genesets,test="ks",fdr=max_fdr,plotting=TRUE)
hyp_dots(ks_hall,merge=TRUE,fdr=max_fdr/5,top=25) + ggtitle(paste("FDR ≤", max_fdr/5))

#hypeR::rctbl_build(ks_hall, show_hmaps=FALSE)
IS C UP
print(ks_hall$data$is_c_up$plots[ks_hall$data$is_c_up$data$fdr<=0.01])
## $HALLMARK_INTERFERON_ALPHA_RESPONSE

##
## $HALLMARK_INTERFERON_GAMMA_RESPONSE

##
## $HALLMARK_KRAS_SIGNALING_UP

##
## $HALLMARK_E2F_TARGETS

##
## $HALLMARK_OXIDATIVE_PHOSPHORYLATION

##
## $HALLMARK_G2M_CHECKPOINT

IS C Down
print(ks_hall$data$is_c_dn$plots[ks_hall$data$is_c_dn$data$fdr<=0.01])
## $HALLMARK_MYC_TARGETS_V1

##
## $HALLMARK_EPITHELIAL_MESENCHYMAL_TRANSITION

##
## $HALLMARK_PROTEIN_SECRETION

##
## $HALLMARK_MTORC1_SIGNALING

##
## $HALLMARK_UV_RESPONSE_DN

##
## $HALLMARK_OXIDATIVE_PHOSPHORYLATION

##
## $HALLMARK_DNA_REPAIR

##
## $HALLMARK_TGF_BETA_SIGNALING

IR IS Up
print(ks_hall$data$ir_is_up$plots[ks_hall$data$ir_is_up$data$fdr<=0.01])
## $HALLMARK_HYPOXIA

##
## $HALLMARK_MYC_TARGETS_V1

##
## $HALLMARK_MITOTIC_SPINDLE

##
## $HALLMARK_UV_RESPONSE_DN

##
## $HALLMARK_TNFA_SIGNALING_VIA_NFKB

##
## $HALLMARK_GLYCOLYSIS

##
## $HALLMARK_OXIDATIVE_PHOSPHORYLATION

IR IS Down
print(ks_hall$data$ir_is_dn$plots[ks_hall$data$ir_is_dn$data$fdr<=0.01])
## $HALLMARK_INTERFERON_ALPHA_RESPONSE

##
## $HALLMARK_OXIDATIVE_PHOSPHORYLATION

##
## $HALLMARK_INTERFERON_GAMMA_RESPONSE

##
## $HALLMARK_MYC_TARGETS_V1

##
## $HALLMARK_ALLOGRAFT_REJECTION

IR C Up
print(ks_hall$data$ir_c_up$plots[ks_hall$data$ir_c_up$data$fdr<=0.01])
## $HALLMARK_G2M_CHECKPOINT

##
## $HALLMARK_E2F_TARGETS

##
## $HALLMARK_HYPOXIA

##
## $HALLMARK_INFLAMMATORY_RESPONSE

##
## $HALLMARK_KRAS_SIGNALING_UP

##
## $HALLMARK_MITOTIC_SPINDLE

##
## $HALLMARK_MYC_TARGETS_V1

##
## $HALLMARK_SPERMATOGENESIS

IR C Down
print(ks_hall$data$ir_c_dn$plots[ks_hall$data$ir_c_dn$data$fdr<=0.01])
## $HALLMARK_EPITHELIAL_MESENCHYMAL_TRANSITION

##
## $HALLMARK_MYC_TARGETS_V1

##
## $HALLMARK_PROTEIN_SECRETION

##
## $HALLMARK_ALLOGRAFT_REJECTION

##
## $HALLMARK_DNA_REPAIR

##
## $HALLMARK_OXIDATIVE_PHOSPHORYLATION

##
## $HALLMARK_IL2_STAT5_SIGNALING
